Summary

The following document includes the code required to generate supplementary figures for the paper:

“Spatial Transcriptomic Profiling of the Human Fallopian Tube Epithelium Reveals Region-specific Gene Expression Patterns”

This document uses a GeoMx dataset that has already undergone Quality control and normalization.

For details, see: geomx_dataset_and_normalization.Rmd

0 Setup

0.1 Load Packages

The following packages are needed for this document.

# 
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")

# The following initializes most up to date version of Bioc
# BiocManager::install(version="3.15")
# 
# BiocManager::install("NanoStringNCTools")
# BiocManager::install("GeomxTools")
# BiocManager::install("GeoMxWorkflows")

# Note:
# Needed to install package lme4, numderiv
library(NanoStringNCTools)
library(GeomxTools)
library(GeoMxWorkflows)

if(packageVersion("GeomxTools") < "2.1" & 
   packageVersion("GeoMxWorkflows") >= "1.0.1"){
    stop("GeomxTools and Workflow versions do not match. Please use the same version. 
    This workflow is meant to be used with most current version of packages. 
    If you are using an older version of Bioconductor please reinstall GeoMxWorkflows and use vignette(GeoMxWorkflows) instead")
}

if(packageVersion("GeomxTools") > "2.1" & 
   packageVersion("GeoMxWorkflows") <= "1.0.1"){
    stop("GeomxTools and Workflow versions do not match. 
         Please use the same version, see install instructions above.")
    
    # to remove current package version
        # remove.packages("GeomxTools")
        # remove.packages("GeoMxWorkflows")
    # see install instructions above 
}
# for file names
library(here)

# read in xl files 
library(readxl)


# Need for UMAP, tSNE plots
library(umap)
library(Rtsne)

# Needed for the heatmap plotting
library(ComplexHeatmap)  #BiocManager::install("ComplexHeatmap")

#Needed for volcano plotting functions

library(ggrepel)
library(ggplot2)
library(latex2exp)
library(plyr)
library(dplyr)


# Read in images and convert to ggplot object

library(magick)


# used for some color palettes  
library(RColorBrewer)
library(stringi)
library(paletteer)
library(wesanderson)


# For significance
library(ggpubr)


# For Stitching multiple plots together
library(patchwork)
library(cowplot)
library(grid)

library(tidyr)

library(readr) # needed for read_csv()


library(stringr)

For space purposes, some functions are moved to an outside document “functions.R”.

0.2 Load the Dataset and all Analyses Used

# make sure to use correct "here" function.

load(here::here("all_q_norm.Rdata"))

load(here::here("disc_q_norm.Rdata"))

load(here::here("valid_q_norm.Rdata"))


dim(all_q_norm)
## Features  Samples 
##     1012      153
dim(disc_q_norm)
## Features  Samples 
##     1026       77
dim(valid_q_norm)
## Features  Samples 
##      999       74

Refactor the GeoMx objects to use simplified region names.

factor_region <- function(GeoMxObj = all_q_norm){
  pData(GeoMxObj)$region <- factor(pData(GeoMxObj)$region, levels = c("Fimbria", "Infundibulum", "Ampulla", "Isthmus"))
  
  pData(GeoMxObj)$region <- revalue(pData(GeoMxObj)$region, c("Fimbria"="Fim", "Infundibulum"="Inf", "Ampulla" = "Amp", "Isthmus" = "Isth"))
  
  return(GeoMxObj)
}


all_q_norm <- all_q_norm |> factor_region()

disc_q_norm <- disc_q_norm |> factor_region()

valid_q_norm <- valid_q_norm |> factor_region()

Subset the “all” dataset into discovery and validation datasets.

The major difference between these datasets and the “disc_q_norm” and “valid_q_norm” is that the latter were normalizzed separately. In contrast, all_disc and all_valid were normalized together and comparisons of gene expression between the two datasets are valid.

all_disc <- all_q_norm[, pData(all_q_norm)$Patient %in% c("P1", "P2", "P3")]

`%ni%` <- Negate(`%in%`)

all_valid <- all_q_norm[, pData(all_q_norm)$Patient %ni% c("P1", "P2", "P3")]

0.3 Load Stat Analyses

Load files required for volcano plots

stat_files <- paste0(getwd(), "/stat_files")

stat_files_all <- paste0(stat_files, "/ALL")
stat_files_disc <- paste0(stat_files, "/Discovery")
stat_files_valid <- paste0(stat_files, "/Validation")

#secretory cells
pairwise_s_disc <- paste0(stat_files_disc, "/0.10_SECRETORY_Pairwise_region_comparison.xlsx")
pairwise_s_valid <- paste0(stat_files_valid, "/Validation_SECRETORY_Pairwise_region_comparison.xlsx")
pairwise_s_all <- paste0(stat_files_all, "/ALL_SECRETORY_Pairwise_region_comparison.xlsx")
# ciliated cells 
pairwise_c_disc <- paste0(stat_files_disc, "/0.10_CILIATED_Pairwise_region_comparison.xlsx")
pairwise_c_valid <- paste0(stat_files_valid, "/Validation_CILIATED_Pairwise_region_comparison.xlsx")
pairwise_c_all <- paste0(stat_files_all, "/ALL_CILIATED_Pairwise_region_comparison.xlsx")

# create a nested list structure to store all of the dataframes in one object

pairwise_comparisons <- list()

Create list of pairwise comparisons

pairwise_comparisons[["disc"]][["sec"]] <- get_all_comparions(path = pairwise_s_disc)
pairwise_comparisons[["valid"]][["sec"]] <- get_all_comparions(path = pairwise_s_valid)
pairwise_comparisons[["all"]][["sec"]] <- get_all_comparions(path = pairwise_s_all)

pairwise_comparisons[["disc"]][["cil"]] <- get_all_comparions(path = pairwise_c_disc)
pairwise_comparisons[["valid"]][["cil"]] <- get_all_comparions(path = pairwise_c_valid)
pairwise_comparisons[["all"]][["cil"]] <- get_all_comparions(path = pairwise_c_all)

Loading One vs Other Comparisons

In this comparison, one region is comared to the average of all other regions, for four total comparisons

  1. fimbria v others
  2. Inf v others
  3. amp v others
  4. isth v others

Excel doc locations

one_v_other_s_disc <- paste0(stat_files_disc, "/0.10_SECRETORY_One.VS.Others_region_comparison.xlsx")
one_v_other_s_valid <- paste0(stat_files_valid, "/Validation_SECRETORY_One.VS.Others_region_comparison.xlsx")
one_v_other_s_all <- paste0(stat_files_all, "/ALL_SECRETORY_One.VS.Others_region_comparison.xlsx")

one_v_other_c_disc <- paste0(stat_files_disc, "/0.10_CILIATED_One.VS.Others_region_comparison.xlsx")
one_v_other_c_valid <- paste0(stat_files_valid, "/Validation_CILIATED_One.VS.Others_region_comparison.xlsx")
one_v_other_c_all <- paste0(stat_files_all, "/ALL_CILIATED_One.VS.Others_region_comparison.xlsx")
one_v_other <- list()


comparison_one_v_other <- c("Fimbria.vs.Others", "Infundibulum.vs.Others", "Ampulla.vs.Others", "Isthmus.vs.Others")

one_v_other_names <- c("fim_v_other", "inf_v_other", "amp_v_other", "isth_v_other")

one_v_other[["disc"]][["sec"]] <- get_all_comparions(path = one_v_other_s_disc, comparison_list = comparison_one_v_other, comparison_name = one_v_other_names)
one_v_other[["valid"]][["sec"]] <- get_all_comparions(path = one_v_other_s_valid, comparison_list = comparison_one_v_other, comparison_name = one_v_other_names)
one_v_other[["all"]][["sec"]] <- get_all_comparions(path = one_v_other_s_all, comparison_list = comparison_one_v_other, comparison_name = one_v_other_names)


one_v_other[["disc"]][["cil"]] <- get_all_comparions(path = one_v_other_c_disc, comparison_list = comparison_one_v_other, comparison_name = one_v_other_names)
one_v_other[["valid"]][["cil"]] <- get_all_comparions(path = one_v_other_c_valid, comparison_list = comparison_one_v_other, comparison_name = one_v_other_names)
one_v_other[["all"]][["cil"]] <- get_all_comparions(path = one_v_other_c_all, comparison_list = comparison_one_v_other, comparison_name = one_v_other_names)

S1. Supplemental Figure 1

OVGP1 staining of the FT as a proxy for menstrual cycle status.

A is an image and B is a table and are not included.

Supplemental Figure 1C is generated below.

OVGP1 Data

OVGP1_percent <- read_xlsx(path = paste0(getwd(), "/IHC_analysis/OVGP1 Percentages.xlsx"))

OVGP1 <- OVGP1_percent |> pivot_longer(cols = c("Fim", "Inf", "Amp", "Isth"), names_to = "region", values_to = "percent") |> 
  mutate(percent = suppressWarnings(as.numeric(percent))) # some values are coerced to NA, but this is desired, so warning suppressed

OVGP1$region <- OVGP1$region |> factor(levels = c("Fim", "Inf", "Amp", "Isth"))

# remove slides not in GeoMx dataset 

OVGP1_subset <- OVGP1[OVGP1$patient_ID %in% unique(pData(all_q_norm)$patient_ID),]

# this is necessary to merge with the GeoMx Object. 

OVGP1_subset <- transform(OVGP1_subset, patient_ID = as.double(patient_ID))

OVGP1 Results Boxplot

ggplot(OVGP1_subset, aes(x = region, y = percent, fill = region)) + 
  geom_boxplot()+
  geom_point() + 
  geom_line(aes(group = patient, color = patient), linetype = "dashed", linewidth = 1.2)+
  theme_bw()+
  scale_color_manual(values = c("#B01513FF", "#EA6312FF", "#F9A729FF", "#50C49FFF", "#54849AFF", "darkblue", "#B560D4FF"))+
  scale_fill_manual(values= c("#7CAE00", "#00C1C6", "#F8766D", "#C77CFF"))+
  theme(text=element_text(size=20))+
  labs(y = "Percent cells OVGP1+", x = "Anatomical Region")

S2. GO BP Analysis

Supplemental Figure 2 - Biological Process Gene Ontology Analysis of differentially expressed genes for all region comparisons in the fallopian tube discovery dataset

GO_file <- "/GO_analysis"

# Discovery File Locations

GO_disc_sec <- paste0(getwd(), GO_file, "/Discovery_GO", "/Pairwise/Sec/") 

GO_disc_cil <- paste0(getwd(), GO_file, "/Discovery_GO", "/Pairwise/Cil/") 

# Validation File Locations

GO_valid_sec <- paste0(getwd(), GO_file, "/Validation_GO", "/Pairwise/Sec/") 

GO_valid_cil <- paste0(getwd(), GO_file, "/Validation_GO", "/Pairwise/Cil/") 


# Combined (all) File Locations

GO_all_sec <- paste0(getwd(), GO_file, "/All_GO", "/Pairwise/Sec/") 

GO_all_cil <- paste0(getwd(), GO_file, "/All_GO", "/Pairwise/Cil/") 

FUNCT: get_all_comparisons

This function automatically reads in all pairwise comparisons (if they exist) and assigns them names. Make sure to check that all names exactly match specifications, or they will not be read in.

comparison_pairwise_GO <- c("Fim v Inf", "Fim v Amp", "Fim v Isth", 
                     "Inf v Amp", "Inf v Isth", "Amp v Isth")

pairwise_names_GO <- c("fim_v_inf", "fim_v_amp", "fim_v_isth", "inf_v_amp", "inf_v_isth", "amp_v_isth")

# append_names <- c()

get_all_comparisons_GO <- function(path = GO_disc_cil, comparison_list = comparison_pairwise_GO, comparison_name = pairwise_names_GO, append = " GO BP Cil.csv"){
  path <- paste0(path, comparison_list, append) # create full path names
  
  path_rm <- path[file.exists(path)] # check to see if files exist and remove if not
  
  names_rm <- comparison_name[file.exists(path)] # remove names for files removed
  
  data_list <- lapply(path_rm, read_csv, show_col_types = F) # read in the data files 
  
  names(data_list) <- names_rm # assign names 
  
  return(data_list)
}
# create a nested list structure to store all of the dataframes in one object
GO_pairwise <- list()

GO_pairwise[["disc"]][["sec"]] <- get_all_comparisons_GO(path = GO_disc_sec, append = " GO BP Sec.csv")
GO_pairwise[["valid"]][["sec"]] <- get_all_comparisons_GO(path = GO_valid_sec, append = " GO BP Sec.csv")
GO_pairwise[["all"]][["sec"]] <- get_all_comparisons_GO(path = GO_all_sec , append = " GO BP Sec.csv")

GO_pairwise[["disc"]][["cil"]] <- get_all_comparisons_GO(path = GO_disc_cil, append = " GO BP Cil.csv")
GO_pairwise[["valid"]][["cil"]] <- get_all_comparisons_GO(path = GO_valid_cil, append = " GO BP Cil.csv")
GO_pairwise[["all"]][["cil"]] <- get_all_comparisons_GO(path = GO_all_cil, append = " GO BP Cil.csv")
names(GO_pairwise$disc$sec)
## [1] "fim_v_inf"  "fim_v_amp"  "fim_v_isth" "inf_v_isth" "amp_v_isth"
names(GO_pairwise$valid$sec)
## [1] "fim_v_isth" "inf_v_amp"  "inf_v_isth" "amp_v_isth"
names(GO_pairwise$all$sec)
## [1] "fim_v_inf"  "fim_v_amp"  "inf_v_amp"  "amp_v_isth"
names(GO_pairwise$disc$cil)
## [1] "fim_v_inf"  "fim_v_amp"  "fim_v_isth" "inf_v_isth" "amp_v_isth"
names(GO_pairwise$valid$cil)
## [1] "fim_v_amp"  "fim_v_isth" "inf_v_amp"  "inf_v_isth" "amp_v_isth"
names(GO_pairwise$all$cil)
## [1] "fim_v_inf"  "fim_v_amp"  "inf_v_amp"  "amp_v_isth"
GO_plots <- list()

pairwise_names <- c("fim_v_inf", "fim_v_amp", "fim_v_isth", "inf_v_isth", "amp_v_isth")

make_GO_plots <- function(dfs = GO_plots$disc$sec){
  
  # get the names of all data frames in the set

  GO_names <- names(dfs)
  
  # for each name, generate a GO plot
  output_graphs <- lapply(GO_names, 
                            function(x){
                              plot_GO(data = dfs[[x]], title = x)
                              })
  # assign names to output
  names(output_graphs) <- GO_names
  
  return(output_graphs)
}



GO_plots$disc$sec <- make_GO_plots(dfs = GO_pairwise$disc$sec)

GO_plots$disc$cil <- make_GO_plots(dfs = GO_pairwise$disc$cil)

GO_plots$valid$sec <- make_GO_plots(dfs = GO_pairwise$valid$sec)

GO_plots$valid$cil <- make_GO_plots(dfs = GO_pairwise$valid$cil)

GO_plots$all$sec <- make_GO_plots(dfs = GO_pairwise$all$sec)

GO_plots$all$cil <- make_GO_plots(dfs = GO_pairwise$all$cil)
top_1 <-   grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Discovery Dataset: GO Plots for Secretory Cells', gp=gpar(fontsize=25)))

GO_plot_disc_sec <- GO_plots$disc$sec$fim_v_inf + GO_plots$disc$sec$fim_v_amp + GO_plots$disc$sec$fim_v_isth + 
  grid::textGrob('No pathways enriched \n Infundibulum v Ampulla', gp=gpar(fontsize=25)) + GO_plots$disc$sec$inf_v_isth + GO_plots$disc$sec$amp_v_isth +
  plot_annotation(tag_levels = list("a")) & theme(plot.tag  = element_text(size = 25)) 

top_2 <-   grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Discovery Dataset: GO Plots for Ciliated Cells', gp=gpar(fontsize=25)))

GO_plot_disc_cil <- 
GO_plots$disc$cil$fim_v_inf + GO_plots$disc$cil$fim_v_amp + GO_plots$disc$cil$fim_v_isth + 
  grid::textGrob('No pathways enriched \n Infundibulum v Ampulla', gp=gpar(fontsize=25)) + GO_plots$disc$cil$inf_v_isth + GO_plots$disc$cil$amp_v_isth +
  plot_annotation(tag_levels = list("g", "h", "i", "j", "k", "l")) & theme(plot.tag  = element_text(size = 25)) 


wrap_elements(top_1) / (GO_plot_disc_sec) / wrap_elements(top_2) / GO_plot_disc_cil + 
  plot_layout(heights = c(0.2, 4, 0.2, 4))

wrap_elements(top_1) / (GO_plots$disc$sec$fim_v_inf + GO_plots$disc$sec$fim_v_amp + GO_plots$disc$sec$fim_v_isth + 
  grid::textGrob('No pathways enriched \n Infundibulum v Ampulla', gp=gpar(fontsize=25)) + GO_plots$disc$sec$inf_v_isth + GO_plots$disc$sec$amp_v_isth) / wrap_elements(top_2) / (GO_plots$disc$cil$fim_v_inf + GO_plots$disc$cil$fim_v_amp + GO_plots$disc$cil$fim_v_isth + 
  grid::textGrob('No pathways enriched \n Infundibulum v Ampulla', gp=gpar(fontsize=25)) + GO_plots$disc$cil$inf_v_isth + GO_plots$disc$cil$amp_v_isth)  + 
  plot_layout(heights = c(0.2, 4, 0.2, 4)) +
  plot_annotation(tag_levels = list(c("", "a", "b", "c", "d", "e", "f", "", "g", "h", "i", "j", "k", "l"))) & theme(plot.tag  = element_text(size = 25)) 

S3. Pairwise Comparison of all Regions (Discovery Cohort)

Supplementary Figure 3 – Pairwise Comparisons of all Regions (Discovery Dataset).

Generate all pairwise comparisons for the discovery dataset for both secretory and ciliated cells.

df1 <- pairwise_comparisons$valid$sec

df2 <- pairwise_comparisons$valid$cil


# cil_fim_v_inf <- get_common_genes(df_1 = df1$fim_v_inf, df_2 = df2$fim_v_inf, type = "both", FC_cutoff = 0.2)
# cil_fim_v_amp <- get_common_genes(df_1 = df1$fim_v_amp, df_2 = df2$fim_v_amp, type = "both", FC_cutoff = 0.2)
# cil_fim_v_isth <- get_common_genes(df_1 = df1$fim_v_isth, df_2 = df2$fim_v_isth, type = "both", FC_cutoff = 0.2)
# cil_inf_v_amp <- get_common_genes(df_1 = df1$inf_v_amp, df_2 = df2$inf_v_amp, type = "both", FC_cutoff = 0.2)
# cil_inf_v_isth <- get_common_genes(df_1 = df1$inf_v_isth, df_2 = df2$inf_v_isth, type = "both", FC_cutoff = 0.2)
# cil_amp_v_isth <- get_common_genes(df_1 = df1$amp_v_isth, df_2 = df2$amp_v_isth, type = "both", FC_cutoff = 0.2)



title = 20
n_genes = 20
lab_size = 5


# DISCOVERY 
a <- volcano_plot(df = df1$fim_v_inf, 
                 title_name = "Fim v Inf, Secretory, Discovery", 
                 title_size = title, upreg = "Fimbria", downreg = "Infundibulum"
                 # highlight_genes = cil_fim_v_inf
                 ) |> add_labels(df = df1$fim_v_inf, n_genes = n_genes, lab_size = lab_size) |> remove_legend()


b <- volcano_plot(df = df1$fim_v_amp, 
                 title_name = "Fim v Amp, Secretory, Discovery", 
                 title_size = title, upreg = "Fimbria", downreg = "Ampulla"
                 # highlight_genes = cil_fim_v_amp
                 ) |> add_labels(df = df1$fim_v_amp, n_genes = n_genes, lab_size = lab_size)

c <- volcano_plot(df = df1$fim_v_isth, 
                 title_name = "Fim v Isth, Secretory, Discovery", 
                 title_size = title, upreg = "Fimbria", downreg = "Isthmus"
                 # highlight_genes = cil_fim_v_isth
                 ) |> add_labels(df = df1$fim_v_isth, n_genes = n_genes, lab_size = lab_size)

d <- volcano_plot(df = df1$inf_v_amp, 
                 title_name = "Inf v Amp, Secretory, Discovery", 
                 title_size = title, upreg = "Infundibulum", downreg = "Ampulla"
                 # highlight_genes = cil_inf_v_amp
                 ) |> add_labels(df = df1$inf_v_amp, n_genes = n_genes, lab_size = lab_size) |> remove_legend()


e <- volcano_plot(df = df1$inf_v_isth, 
                 title_name = "Inf v Isth, Secretory, Discovery", 
                 title_size = title, upreg = "Infundibulum", downreg = "Isthmus"
                 # highlight_genes = cil_inf_v_isth
                 ) |> add_labels(df = df1$inf_v_isth, n_genes = n_genes, lab_size = lab_size)

f <- volcano_plot(df = df1$amp_v_isth, 
                 title_name = "Amp v Isth, Secretory, Discovery", 
                 title_size = title, upreg = "Ampulla", downreg = "Isthmus"
                 # highlight_genes = cil_amp_v_isth
                 ) |> add_labels(df = df1$amp_v_isth, n_genes = n_genes, lab_size = lab_size)


# Validation

g <- volcano_plot(df = df2$fim_v_inf, 
                 title_name = "Fimb v Inf, Ciliated, Discovery", 
                 title_size = title, upreg = "Fimbria", downreg = "Infundibulum"
                 # highlight_genes = cil_fim_v_inf
                 ) |> add_labels(df = df2$fim_v_inf, n_genes = n_genes, lab_size = lab_size) |> remove_legend()


h <- volcano_plot(df = df2$fim_v_amp, 
                 title_name = "Fimb v Amp, Ciliated, Discovery", 
                 title_size = title, upreg = "Fimbria", downreg = "Ampulla"
                 # highlight_genes = cil_fim_v_amp
                 ) |> add_labels(df = df2$fim_v_amp, n_genes = n_genes, lab_size = lab_size)

i <- volcano_plot(df = df2$fim_v_isth, 
                 title_name = "Fimb v Isth, Ciliated, Discovery", 
                 title_size = title, upreg = "Fimbria", downreg = "Isthmus"
                 # highlight_genes = cil_fim_v_isth
                 ) |> add_labels(df = df2$fim_v_isth, n_genes = n_genes, lab_size = lab_size)

j <- volcano_plot(df = df2$inf_v_amp, 
                 title_name = "Inf v Amp, Ciliated, Discovery", 
                 title_size = title, upreg = "Infundibulum", downreg = "Ampulla"
                 # highlight_genes = cil_inf_v_amp
                 ) |> add_labels(df = df2$inf_v_amp, n_genes = n_genes, lab_size = lab_size) |> remove_legend()


k <- volcano_plot(df = df2$inf_v_isth, 
                 title_name = "Inf v Isth, Ciliated, Discovery", 
                 title_size = title, upreg = "Infundibulum", downreg = "Isthmus"
                 # highlight_genes = cil_inf_v_isth
                 ) |> add_labels(df = df2$inf_v_isth, n_genes = n_genes, lab_size = lab_size)

l <- volcano_plot(df = df2$amp_v_isth, 
                 title_name = "Amp v Isth, Ciliated, Discovery", 
                 title_size = title, upreg = "Ampulla", downreg = "Isthmus"
                 # highlight_genes = cil_amp_v_isth
                 ) |> add_labels(df = df2$amp_v_isth, n_genes = n_genes, lab_size = lab_size)


top <-   grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Discovery Dataset, Secretory Cells', gp=gpar(fontsize=25)))

bottom <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Discovery Dataset, Ciliated Cells', gp=gpar(fontsize=25)))

wrap_elements(top) / ((a | b | c ) / (d | e | f)) / wrap_elements(bottom) / ((g | h | i ) / (j | k | l)) + plot_layout(heights = c(1,15,1,15), guides = "collect") + plot_annotation(tag_levels = list(c("", "a", "b", "c", "d", "e", "f", "", "g", "h", "i", "j", "k", "l"))) & 
  theme(plot.tag = element_text(size = 26))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#S4.Pairwise Comparisons volcano plots (Validation Cohort).

Supplementary Figure 4 – Pairwise Comparisons of all Regions (Validation Cohort).

df1 <- pairwise_comparisons$valid$sec

df2 <- pairwise_comparisons$valid$cil


# cil_fim_v_inf <- get_common_genes(df_1 = df1$fim_v_inf, df_2 = df2$fim_v_inf, type = "both", FC_cutoff = 0.2)
# cil_fim_v_amp <- get_common_genes(df_1 = df1$fim_v_amp, df_2 = df2$fim_v_amp, type = "both", FC_cutoff = 0.2)
# cil_fim_v_isth <- get_common_genes(df_1 = df1$fim_v_isth, df_2 = df2$fim_v_isth, type = "both", FC_cutoff = 0.2)
# cil_inf_v_amp <- get_common_genes(df_1 = df1$inf_v_amp, df_2 = df2$inf_v_amp, type = "both", FC_cutoff = 0.2)
# cil_inf_v_isth <- get_common_genes(df_1 = df1$inf_v_isth, df_2 = df2$inf_v_isth, type = "both", FC_cutoff = 0.2)
# cil_amp_v_isth <- get_common_genes(df_1 = df1$amp_v_isth, df_2 = df2$amp_v_isth, type = "both", FC_cutoff = 0.2)



title = 20
n_genes = 20
lab_size = 5


# DISCOVERY 
a <- volcano_plot(df = df1$fim_v_inf, 
                 title_name = "Fim v Inf, Secretory, Validation", 
                 title_size = title, upreg = "Fimbria", downreg = "Infundibulum"
                 # highlight_genes = cil_fim_v_inf
                 ) |> add_labels(df = df1$fim_v_inf, n_genes = n_genes, lab_size = lab_size) |> remove_legend()


b <- volcano_plot(df = df1$fim_v_amp, 
                 title_name = "Fim v Amp, Secretory, Validation", 
                 title_size = title, upreg = "Fimbria", downreg = "Ampulla"
                 # highlight_genes = cil_fim_v_amp
                 ) |> add_labels(df = df1$fim_v_amp, n_genes = n_genes, lab_size = lab_size)

c <- volcano_plot(df = df1$fim_v_isth, 
                 title_name = "Fim v Isth, Secretory, Validation", 
                 title_size = title, upreg = "Fimbria", downreg = "Isthmus"
                 # highlight_genes = cil_fim_v_isth
                 ) |> add_labels(df = df1$fim_v_isth, n_genes = n_genes, lab_size = lab_size)

d <- volcano_plot(df = df1$inf_v_amp, 
                 title_name = "Inf v Amp, Secretory, Validation", 
                 title_size = title, upreg = "Infundibulum", downreg = "Ampulla"
                 # highlight_genes = cil_inf_v_amp
                 ) |> add_labels(df = df1$inf_v_amp, n_genes = n_genes, lab_size = lab_size) |> remove_legend()


e <- volcano_plot(df = df1$inf_v_isth, 
                 title_name = "Inf v Isth, Secretory, Validation", 
                 title_size = title, upreg = "Infundibulum", downreg = "Isthmus"
                 # highlight_genes = cil_inf_v_isth
                 ) |> add_labels(df = df1$inf_v_isth, n_genes = n_genes, lab_size = lab_size)

f <- volcano_plot(df = df1$amp_v_isth, 
                 title_name = "Amp v Isth, Secretory, Validation", 
                 title_size = title, upreg = "Ampulla", downreg = "Isthmus"
                 # highlight_genes = cil_amp_v_isth
                 ) |> add_labels(df = df1$amp_v_isth, n_genes = n_genes, lab_size = lab_size)


# Validation

g <- volcano_plot(df = df2$fim_v_inf, 
                 title_name = "Fimb v Inf, Ciliated, Validation", 
                 title_size = title, upreg = "Fimbria", downreg = "Infundibulum"
                 # highlight_genes = cil_fim_v_inf
                 ) |> add_labels(df = df2$fim_v_inf, n_genes = n_genes, lab_size = lab_size) |> remove_legend()


h <- volcano_plot(df = df2$fim_v_amp, 
                 title_name = "Fimb v Amp, Ciliated, Validation", 
                 title_size = title, upreg = "Fimbria", downreg = "Ampulla"
                 # highlight_genes = cil_fim_v_amp
                 ) |> add_labels(df = df2$fim_v_amp, n_genes = n_genes, lab_size = lab_size)

i <- volcano_plot(df = df2$fim_v_isth, 
                 title_name = "Fimb v Isth, Ciliated, Validation", 
                 title_size = title, upreg = "Fimbria", downreg = "Isthmus"
                 # highlight_genes = cil_fim_v_isth
                 ) |> add_labels(df = df2$fim_v_isth, n_genes = n_genes, lab_size = lab_size)

j <- volcano_plot(df = df2$inf_v_amp, 
                 title_name = "Inf v Amp, Ciliated, Validation", 
                 title_size = title, upreg = "Infundibulum", downreg = "Ampulla"
                 # highlight_genes = cil_inf_v_amp
                 ) |> add_labels(df = df2$inf_v_amp, n_genes = n_genes, lab_size = lab_size) |> remove_legend()


k <- volcano_plot(df = df2$inf_v_isth, 
                 title_name = "Inf v Isth, Ciliated, Validation", 
                 title_size = title, upreg = "Infundibulum", downreg = "Isthmus"
                 # highlight_genes = cil_inf_v_isth
                 ) |> add_labels(df = df2$inf_v_isth, n_genes = n_genes, lab_size = lab_size)

l <- volcano_plot(df = df2$amp_v_isth, 
                 title_name = "Amp v Isth, Ciliated, Validation", 
                 title_size = title, upreg = "Ampulla", downreg = "Isthmus"
                 # highlight_genes = cil_amp_v_isth
                 ) |> add_labels(df = df2$amp_v_isth, n_genes = n_genes, lab_size = lab_size)


top <-   grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Validation Dataset, Secretory Cells', gp=gpar(fontsize=25)))

bottom <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Validation Dataset, Ciliated Cells', gp=gpar(fontsize=25)))

wrap_elements(top) / ((a | b | c ) / (d | e | f)) / wrap_elements(bottom) / ((g | h | i ) / (j | k | l)) + plot_layout(heights = c(1,15,1,15), guides = "collect") + plot_annotation(tag_levels = list(c("", "a", "b", "c", "d", "e", "f", "", "g", "h", "i", "j", "k", "l"))) & 
  theme(plot.tag = element_text(size = 26))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#S5. Regional comparison volcano plots (validation dataset)

Supplementary Figure 5. Regional comparison volcano plots showing transcripts upregulated and downregulated in secretory and ciliated ROIs in the validation cohort.

valid_sec <- one_v_other$valid$sec

volcano_plot(df = valid_sec$fim_v_other, 
                 title_name = "Fimbria Secretory Cells, Validation", title_size = title, upreg = "Fimbria", downreg = "Others"
                 ) |> add_labels(df = valid_sec$fim_v_other, n_genes = 15) |> remove_legend()

volcano_plot(df = valid_sec$inf_v_other, 
                 title_name = "Infundibulum Secretory Cells, Validation", title_size = title, upreg = "Infundibulum", downreg = "Others"
                 ) |> add_labels(df = valid_sec$inf_v_other, n_genes = 15) |> remove_legend()

volcano_plot(df = valid_sec$amp_v_other, 
                 title_name = "Ampulla Secretory Cells, Validation", title_size = title, upreg = "Ampulla", downreg = "Others"
                 ) |> add_labels(df = valid_sec$amp_v_other, n_genes = 15) |> remove_legend()

volcano_plot(df = valid_sec$isth_v_other, 
                 title_name = "Isthmus Secretory Cells, Validation", title_size = title, upreg = "Isthmus", downreg = "Others"
                 ) |> add_labels(df = valid_sec$isth_v_other, n_genes = 15) |> remove_legend()

valid_cil <- one_v_other$valid$cil

volcano_plot(df = valid_cil$fim_v_other, 
                 title_name = "Fimbria Ciliated Cells, Validation", title_size = title, upreg = "Fimbria", downreg = "Others"
                 ) |> add_labels(df = valid_cil$fim_v_other, n_genes = 15) |> remove_legend()

volcano_plot(df = valid_cil$inf_v_other, 
                 title_name = "Infundibulum Ciliated Cells, Validation", title_size = title, upreg = "Infundibulum", downreg = "Others"
                 ) |> add_labels(df = valid_cil$inf_v_other, n_genes = 15) |> remove_legend()

volcano_plot(df = valid_cil$amp_v_other, 
                 title_name = "Ampulla Ciliated Cells, Validation", title_size = title, upreg = "Ampulla", downreg = "Others"
                 ) |> add_labels(df = valid_cil$amp_v_other, n_genes = 15) |> remove_legend()

volcano_plot(df = valid_cil$isth_v_other, 
                 title_name = "Isthmus Ciliated Cells, Validation", title_size = title, upreg = "Isthmus", downreg = "Others"
                 ) |> add_labels(df = valid_cil$isth_v_other, n_genes = 15) |> remove_legend()

S6. Markers of Mature Ciliated Cells up in distal FT

Supplemental Figure 6 – Markers of Mature Ciliated Cells are upregulated in the distal fallopian tube

S7. Other MHC II Transcripts

Supplementary Figure 7. Other MHC-II Transcripts in the FT epithelium.

S8. Analysis of other FT transcriptome data

Supplementary Figure 8. Multiple datasets show MHC-II transcript expression varies throughout the menstrual cycle and is inversely correlated with OVGP1.

Analysis of publicly available FT transcriptome datasets.

This analysis is too long to fit neatly here, so I have separated it into two files.

For the Sowamber paper (figure S8A and S8B) see Sowamber Paper Analysis.Rmd.

S9. Peg cell markers elevated in the interior FT during follicular phase.

Supplementary Figure 9. Peg cell markers elevated in the interior FT during follicular phase.

S9.1 Create list of Peg cell markers from Ulrich paper

Markers of Non-ciliated epithelial (NCE) cells are available publically from Ulrich 2022. (DOI: 10.1016/j.devcel.2022.02.017)

The paper identifies probable peg cell cluster (OVGP1 producing secretory cells) as 2-5.

## Function to retrieve Gene IDs

pull_gene <- function(df = NULL, cluster_name = NULL){
  out <- df |> filter(cluster == cluster_name) |> pull(gene)
  
  return(out)
}

# Retrieve PEG cell markers identified in this excel document

source_file_NCE <- paste0(getwd(), "/ulrich_files/Ulrich_NCE_Markers.xlsx")


# read in excel document
NCSE_markers <- read_xlsx(source_file_NCE, skip = 4, range = cell_cols("B:H"))

# Filter out top markers (best cell type distinguishing capacity)

SE5_top <- NCSE_markers |> filter(cluster == "2-5")  |> filter((pct.1 - pct.2) > 0.5)  # PEG Cell Markers


# get list of genes in top markers
peg_markers <- SE5_top$gene


# Alternative, for looking at all markers significantly higher in peg cells
# SE5_markers <- NCSE_markers |> pull_gene(cluster_name = "2-5") # ALL PEG Markers

S9.2 t_test

Subset the dataset to look only at secretory cells, then perform t-test comparing regions of interest.

FUNC: t.test_logfold()

summary: perform t-test comparing all genes between two different selected regions.

t.test_logfold <- function(obj, variable_type = "region", upreg_group = "Fimbria", downreg_group = "Isthmus", elt_name = "q_norm"){
  
  # Get locations of all in the "up" group for that variable
  g_up <- which(pData(obj)[variable_type] == upreg_group)
  # get locations of all in the "down" group for that variable
  g_down <- which(pData(obj)[variable_type] == downreg_group)
  
  # Retrieve normalized expression data from the GeoMxDataSet Object
  obj <-  assayDataElement(object = obj, elt = elt_name)
  
  # For each gene (every row), perform a T-test comparing the columns in the 
  #up_group vs down_group
  
  t.test <- apply(obj, 1, 
              function(x) t.test(
                x[g_up],
                x[g_down]))
  
  #calculate the log_2 Fold Change of up_group compared to down for each row
  
  log_fold <- apply(obj, 1,
                    function(x)  log2(mean(x[g_up])/mean(x[g_down]))
                    )
  
  # convert to data frame
  log_fold <- as.data.frame(log_fold)
  
  # retrieve the p.values from the t.test object and store to a datafram

  df <- do.call(rbind, lapply(t.test, function(x) c(
    p_value = unname(x$p.value))))
  
  # create an adjusted p_value using the Bonferooni correction 

  df <- transform(df, padj = p.adjust(p_value, method = "BH"))
  
  # merge the log_fold values with the p_values
  
  output <- merge(df, log_fold, by = 0)
  
  
  # rename the column rownames to Marker.name
  
  output <- output |> dplyr::rename(Marker.name = Row.names)
  #  Add the -log10 of the pvalue for plotting 
  output <- output |> mutate(minuslog10_Pvalue = -log10(p_value))
  
  return(output)
  
}

# t.test_logfold_region(obj = all_q_norm, group_up = "Isth", group_down = "Fim")

Perform t-test

# subset the discovery datset to look only at expression in Secretory Cells 
disc_q_norm_sec <- disc_q_norm[, pData(disc_q_norm)$segment %in% c("Secretory")]

# perform t_tests for all regions

disc_isth.v.fim <- t.test_logfold(obj = disc_q_norm_sec, upreg_group = "Isth", downreg_group = "Fim")

disc_inf.v.fim <- t.test_logfold(obj = disc_q_norm_sec, upreg_group = "Inf", downreg_group = "Fim")

disc_inf.v.amp <- t.test_logfold(obj = disc_q_norm_sec, upreg_group = "Amp", downreg_group = "Inf")

disc_amp.v.isth <- t.test_logfold(obj = disc_q_norm_sec, upreg_group = "Isth", downreg_group = "Amp")

S9 Volcano plots

# A <- volcano_plot(df = disc_isth.v.fim, title_name = "Fimbria vs Isthmus Comparison (Discovery)", highlight_genes = peg_markers, upreg = "Isthmus", downreg = "Fimbria", FC_cutoff =0.5) |> add_labels(df = disc_isth.v.fim, highlight_genes = peg_markers, n_genes = 0)

s9_fim_v_inf <- volcano_plot(df = disc_inf.v.fim, title_name = "Fimbria vs Infundibulum Comparison (Discovery)", highlight_genes = peg_markers, upreg = "Infundibulum", downreg = "Fimbria", FC_cutoff =0.5) |> add_labels(df = disc_inf.v.fim, highlight_genes = peg_markers, n_genes = 0) |> remove_legend() + theme(plot.title = element_text(size=16))

s9_inf_v_amp <- volcano_plot(df = disc_inf.v.amp, title_name = "Infundibulum vs Ampulla Comparison (Discovery)", highlight_genes = peg_markers, upreg = "Ampulla", downreg = "Infundibulum", FC_cutoff =0.5)|> add_labels(df = disc_inf.v.amp, highlight_genes = peg_markers, n_genes = 0) |> remove_legend() + theme(plot.title = element_text(size=16))


s9_amp_v_isth <- volcano_plot(df = disc_amp.v.isth, title_name = "Ampulla vs Isthmus Comparison (Discovery)", highlight_genes = peg_markers, upreg = "Isthmus", downreg = "Ampulla", FC_cutoff =0.5) |> add_labels(df = disc_amp.v.isth, highlight_genes = peg_markers, n_genes = 0) |> remove_legend()  + theme(plot.title = element_text(size=16))

# 
# valid_q_norm_sec <- valid_q_norm[, pData(valid_q_norm)$segment %in% c("Secretory")]
# 
# 
# valid_isth.v.fim <- t.test_logfold(obj = valid_q_norm_sec, upreg_group = "Isth", downreg_group = "Fim")
# 
# valid_inf.v.fim <- t.test_logfold(obj = valid_q_norm_sec, upreg_group = "Inf", downreg_group = "Fim")
# 
# valid_inf.v.amp <- t.test_logfold(obj = valid_q_norm_sec, upreg_group = "Amp", downreg_group = "Inf")
# 
# valid_amp.v.isth <- t.test_logfold(obj = valid_q_norm_sec, upreg_group = "Isth", downreg_group = "Amp")
# 
# 
# Q <- volcano_plot(df = valid_isth.v.fim, title_name = "Fimbria vs Isthmus Comparison (Validation)", highlight_genes = peg_markers, upreg = "Isthmus", downreg = "Fimbria", FC_cutoff =0.5)
# 
# R <- volcano_plot(df = valid_inf.v.fim, title_name = "Fimbria vs Infundibulum Comparison (Validation)", highlight_genes = peg_markers, upreg = "Infundibulum", downreg = "Fimbria", FC_cutoff =0.5)
# 
# S <- volcano_plot(df = valid_inf.v.amp, title_name = "Infundibulum vs Ampulla Comparison (Validation)", highlight_genes = peg_markers, upreg = "Ampulla", downreg = "Infundibulum", FC_cutoff =0.5)
# 
# 
# V <- volcano_plot(df = valid_amp.v.isth, title_name = "Isthmus vs Ampulla Comparison (Validation)", highlight_genes = peg_markers, upreg = "Isthmus", downreg = "Ampulla", FC_cutoff =0.5)


# (s9_fim_v_inf + s9_inf_v_amp + s9_amp_v_isth)

S9.3 boxplots

peg_markers_s9 <- c("LGR5", "NOTCH2", "COL1A2", "SERINC5")

# create settings for boxplots



make_figs9_boxplots <- function(genes = Fig5_genes, dataset = all_q_norm, title = "All"){

  graphs <- lapply(X = genes, FUN = Cil_v_Sec_Anat_Boxplot_Points, dataset = dataset)
  
  # resize
  graph_resize <- lapply(graphs, adj_text_size, pnt = 17)
  graph_remove_legend <- lapply(graph_resize, remove_legend)
  graph_sig <- lapply(graph_remove_legend, add_sig, pnt = 4)

  return(graph_sig)
  
  }



# Create all of the Boxplots for the Discovery Dataset

fs9_disc <- make_figs9_boxplots(genes = peg_markers_s9 , dataset = disc_q_norm)
names(fs9_disc) <- peg_markers_s9

# Create all of the Boxplots for the Validation Dataset

fs9_valid <- make_figs9_boxplots(genes = peg_markers_s9 , dataset = valid_q_norm)
names(fs9_valid) <- peg_markers_s9

S9.4 Final Plot

# Create Grobs for use as labels

top <-   grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Discovery Dataset: OVGP1+ (peg cell) transcripts', gp=gpar(fontsize=25)))

bottom <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Validation Dataset: OVGP1+ (peg cell) transcripts', gp=gpar(fontsize=25)))


# create legends

legend_disc <- make_boxplot_legend(dataset = disc_q_norm)

legend_valid <- make_boxplot_legend(dataset = valid_q_norm)


  # Part 1 - volcano plots
  (s9_fim_v_inf | s9_inf_v_amp | s9_amp_v_isth) /

  # Part 2 - disc PEG Transcripts
  wrap_elements(top) / 
  (fs9_disc$`LGR5` | fs9_disc$`NOTCH2` |  fs9_disc$`COL1A2` | fs9_disc$`SERINC5`) /
  legend_disc /
   
  # Part 3 - valid PEG Transcripts  
  wrap_elements(bottom) / 
  (fs9_valid$`LGR5` | fs9_valid$`NOTCH2` |  fs9_valid$`COL1A2` | fs9_valid$`SERINC5`) /
  legend_valid + plot_layout(heights =
                            c(3,          # Part 1
                            0.5, 4, 0.5, # Part 2
                            0.5, 4, 0.5  # Part 3
                            ))

S10. Hormone receptors and associated transc factors

Supplemental Figure 9 – Hormone Receptors and associated Pioneer Transcription Factors show fluctuating expression across fallopian tube driven by the menstrual cycle

# This function makes all of the boxplots for figure S10 according to the same specifications


FigS10_gene_list <- c("ESR1", "PGR", "AR", "FOXA2", "PBX1")

make_figS10 <- function(genes = FigS10_gene_list, dataset = all_q_norm, title = "All"){

  graphs_up_Fig <- lapply(X = genes, FUN = Cil_v_Sec_Anat_Boxplot_Points, dataset = dataset)
  
  Fig_resize <- lapply(graphs_up_Fig, adj_text_size, pnt = 17)
  Fig_remove_legend <- lapply(Fig_resize, remove_legend)
  Fig_sig <- lapply(Fig_remove_legend, add_sig, pnt = 4)

  return(Fig_sig)
  
  }


# Create all of the Boxplots for the Discovery Dataset

FS10_disc <- make_figS10(genes = FigS10_gene_list , dataset = disc_q_norm)
names(FS10_disc) <- FigS10_gene_list

# Create all of the Boxplots for the Validation Dataset

FS10_valid <- make_figS10(genes = FigS10_gene_list , dataset = valid_q_norm)
names(FS10_valid) <- FigS10_gene_list
# create grobs
top <-   grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Discovery Dataset: Hormone Receptors and Associated Pioneer Transcription Factors', gp=gpar(fontsize=25)))

bottom <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Validation Dataset: Hormone Receptors and Associated Pioneer Transcription Factors', gp=gpar(fontsize=25)))



  wrap_elements(top) / 
  (FS10_disc$ESR1 | FS10_disc$PGR |  FS10_disc$AR | FS10_disc$FOXA2 | FS10_disc$PBX1) /
  legend_disc /
  wrap_elements(bottom) / 
  (FS10_valid$ESR1 | FS10_valid$PGR |  FS10_valid$AR | FS10_valid$FOXA2 | FS10_valid$PBX1) /
  legend_valid +
  plot_layout(heights = c(0.5, 4, 0.5, 0.5, 4, 0.5))